package com.jspxcms.core.fulltext;

import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.AttributeSource;

import java.io.IOException;
import java.util.Stack;

public class PinyinNGramTokenFilter extends TokenFilter{
	private Stack<String> stack=null;
	private AttributeSource.State current;
	private CharTermAttribute cta=null;
	private PositionIncrementAttribute pia=null;
	private  int minGram=3;
	private  int maxGram=10;
	public PinyinNGramTokenFilter(TokenStream input,int minGram, int maxGram) {
		super(input);
		stack=new Stack<>();
		cta=this.addAttribute(CharTermAttribute.class);
		pia=this.addAttribute(PositionIncrementAttribute.class);
		this.maxGram=maxGram;
		this.minGram=minGram;
	}

	@Override
	public boolean incrementToken() throws IOException {

		if(stack.size()>0)
		{
			//将元素出栈，并且获取这个拼音
			String str=stack.pop();
			//还原状态
			restoreState(current);
			cta.setEmpty();
			cta.append(str);
			//设置位置0
			pia.setPositionIncrement(0);
			return true;
		}
		if(!input.incrementToken())
			return false;
		if(containsChinese(cta.toString()))
		{
			return true;
		}

		if(addPinYinNGramWord(cta.toString()))
		{
			//如果有拼音将当前状态先保存
			current=this.captureState();
		}


		return true;
	}

	private boolean addPinYinNGramWord(String s) {
		if(s!=null)
		{
			for(int i=minGram;i<maxGram;i++)
			{
				if(i>=s.length())
				{
					break;
				}
				String sub=s.substring(0,i);
				stack.push(sub);
			}
			return true;
		}
		return false;
	}

	private boolean containsChinese(String s) {

		if ((null == s) || ("".equals(s.trim())))
			return false;
		for (int i = 0; i < s.length(); i++) {
			if (isChinese(s.charAt(i)))
				return true;
		}
		return false;
	}

	private boolean isChinese(char a) {
		int v = a;
		return (v >= 19968) && (v <= 171941);
	}

}
